1. Load and prepare
data
# Load data
excel_path <- "../CRC_ProteoResistance_data/CPMSF_GPPF-PM-43_CPPF-PM-43_results_20240119.xlsx"
quan_df <- read_excel(excel_path, sheet = "Quan_data")
# Standardize first 3 columns
colnames(quan_df)[1:3] <- c("protein_id", "gene_symbol", "description")
colnames(quan_df) <- str_replace_all(colnames(quan_df), "TH9616", "TH9619")
# Clean intensities
for (i in 4:ncol(quan_df)) {
quan_df[[i]] <- suppressWarnings(as.numeric(quan_df[[i]]))
quan_df[[i]][is.na(quan_df[[i]]) | quan_df[[i]] <= 0] <- 1e-3
}
# Matrix & metadata
assay_data <- as.matrix(quan_df[, 4:ncol(quan_df)])
rownames(assay_data) <- make.unique(quan_df$gene_symbol)
row_metadata <- quan_df[, 1:3]
rownames(row_metadata) <- rownames(assay_data)
# Sample metadata
sample_names <- colnames(assay_data)
cell_line <- case_when(
str_detect(sample_names, "HCT116") ~ "HCT116",
str_detect(sample_names, "SW620") ~ "SW620",
TRUE ~ NA_character_
)
treatment <- case_when(
str_detect(sample_names, "parental") ~ "Parental",
str_detect(sample_names, "TH9619") ~ "TH9619_R",
str_detect(sample_names, "MTX") ~ "MTX_R",
TRUE ~ "Unknown"
)
group <- paste(cell_line, treatment, sep = ".")
col_metadata <- DataFrame(
sample = sample_names,
cell_line = cell_line,
treatment = treatment,
group = group,
row.names = sample_names
)
# SE object
se <- SummarizedExperiment(
assays = list(norm = assay_data),
rowData = row_metadata,
colData = col_metadata
)
2. PCA plot
mat <- assay(se)
mat <- mat[apply(mat, 1, function(x) sd(x, na.rm = TRUE) > 0), ]
pca <- prcomp(t(mat), scale. = TRUE)
cd <- as.data.frame(colData(se))
fviz_pca_ind(pca,
geom.ind = "point",
col.ind = cd$group,
palette = "jco",
title = "PCA of Normalized Proteins")

3. Volcano plots &
DEGs
cell_lines <- unique(colData(se)$cell_line)
de_results <- list()
deg_filtered <- list()
for (cl in cell_lines) {
se_cl <- se[, colData(se)$cell_line == cl]
treatments <- unique(se_cl$treatment)
treatments <- treatments[treatments != "Parental"]
for (tr in treatments) {
comp_name <- paste0(cl, "_", tr, "_vs_Parental")
selected <- se_cl[, se_cl$treatment %in% c("Parental", tr)]
selected$treatment <- droplevels(factor(selected$treatment))
design <- model.matrix(~ 0 + selected$treatment)
colnames(design) <- levels(selected$treatment)
fit <- lmFit(assay(selected), design)
contrast_matrix <- makeContrasts(paste0(tr, " - Parental"), levels = design)
fit2 <- contrasts.fit(fit, contrast_matrix)
fit2 <- eBayes(fit2)
top_res <- topTable(fit2, coef = 1, number = Inf) %>%
rownames_to_column("gene_symbol") %>%
mutate(comparison = comp_name)
degs <- top_res %>% filter(adj.P.Val < 0.05, abs(logFC) > 1)
de_results[[comp_name]] <- top_res
deg_filtered[[comp_name]] <- degs
}
}
4. Volcano plots
for (comp in names(deg_filtered)) {
df <- de_results[[comp]] %>%
mutate(Regulation = case_when(
adj.P.Val < 0.005 & logFC > 2 ~ "Up",
adj.P.Val < 0.005 & logFC < -2 ~ "Down",
TRUE ~ "NotSig"
))
top10 <- df %>%
filter(adj.P.Val < 0.05, abs(logFC) > 2) %>%
arrange(adj.P.Val) %>%
slice(1:10)
p <- ggplot(df, aes(x = logFC, y = -log10(adj.P.Val))) +
geom_point(aes(color = Regulation), alpha = 0.6) +
geom_point(data = top10, shape = 21, fill = NA, color = "black", size = 2.5, stroke = 0.8) +
geom_text_repel(data = top10, aes(label = gene_symbol), color = "black", size = 3) +
geom_vline(xintercept = c(-2, 2), linetype = "dashed", color = "blue") +
geom_hline(yintercept = -log10(0.0005), linetype = "dashed", color = "red") +
scale_color_manual(values = c("Up" = "firebrick", "Down" = "royalblue", "NotSig" = "grey70")) +
theme_minimal(base_size = 12) +
labs(title = paste("Volcano Plot –", comp),
x = "log2 Fold Change",
y = "-log10 Adjusted P-Value")
print(p)
}




5. DEG table
for (comp in names(deg_filtered)) {
dat <- deg_filtered[[comp]] %>%
arrange(adj.P.Val) %>%
dplyr::select(gene_symbol, logFC, adj.P.Val, AveExpr)
DT::datatable(dat,
caption = htmltools::tags$caption(
style = 'caption-side: top; text-align: left;',
paste("Top DEGs for", comp)),
options = list(pageLength = 10, scrollX = TRUE))
}
6. Functional
Enrichment (KEGG)
library(clusterProfiler)
library(org.Hs.eg.db)
enrich_results <- list()
for (comp in names(deg_filtered)) {
genes <- deg_filtered[[comp]]$gene_symbol
# Map gene symbols to ENTREZ IDs
entrez_ids <- tryCatch({
bitr(genes,
fromType = "SYMBOL",
toType = "ENTREZID",
OrgDb = org.Hs.eg.db)
}, error = function(e) {
message("❌ Failed to convert symbols for ", comp, ": ", e$message)
return(NULL)
})
if (!is.null(entrez_ids) && nrow(entrez_ids) > 5) {
# Run KEGG enrichment
kegg_res <- tryCatch({
enrichKEGG(gene = entrez_ids$ENTREZID, organism = "hsa")
}, error = function(e) {
message("❌ KEGG enrichment failed for ", comp, ": ", e$message)
return(NULL)
})
# Check if valid and non-empty
if (!is.null(kegg_res) &&
inherits(kegg_res, "enrichResult") &&
nrow(kegg_res@result) > 0 &&
"Description" %in% colnames(kegg_res@result) &&
any(!is.na(kegg_res@result$Description))) {
enrich_results[[comp]] <- kegg_res
tryCatch({
print(barplot(kegg_res,
showCategory = 10,
title = paste("KEGG Pathways -", comp)))
}, error = function(e) {
message("⚠️ Plotting failed for ", comp, ": ", e$message)
})
} else {
message("⚠️ No enriched KEGG terms found for ", comp)
}
} else {
message("⚠️ Not enough valid genes mapped for ", comp)
}
}


library(clusterProfiler)
library(org.Hs.eg.db)
library(enrichplot)
go_results <- list()
for (comp in names(deg_filtered)) {
genes <- deg_filtered[[comp]]$gene_symbol
entrez_ids <- tryCatch({
bitr(genes,
fromType = "SYMBOL",
toType = "ENTREZID",
OrgDb = org.Hs.eg.db)
}, error = function(e) {
message("❌ Failed to convert symbols for ", comp, ": ", e$message)
return(NULL)
})
if (!is.null(entrez_ids) && nrow(entrez_ids) > 5) {
# Biological Process (BP)
go_bp <- tryCatch({
enrichGO(gene = entrez_ids$ENTREZID,
OrgDb = org.Hs.eg.db,
keyType = "ENTREZID",
ont = "BP",
pAdjustMethod = "BH",
pvalueCutoff = 0.05,
readable = TRUE)
}, error = function(e) NULL)
# Molecular Function (MF)
go_mf <- tryCatch({
enrichGO(gene = entrez_ids$ENTREZID,
OrgDb = org.Hs.eg.db,
keyType = "ENTREZID",
ont = "MF",
pAdjustMethod = "BH",
pvalueCutoff = 0.05,
readable = TRUE)
}, error = function(e) NULL)
# Cellular Component (CC)
go_cc <- tryCatch({
enrichGO(gene = entrez_ids$ENTREZID,
OrgDb = org.Hs.eg.db,
keyType = "ENTREZID",
ont = "CC",
pAdjustMethod = "BH",
pvalueCutoff = 0.05,
readable = TRUE)
}, error = function(e) NULL)
# Store in list
go_results[[comp]] <- list(
BP = go_bp,
MF = go_mf,
CC = go_cc
)
# Plot if not empty
if (!is.null(go_bp) && nrow(go_bp) > 0) {
print(barplot(go_bp, showCategory = 10,
title = paste("GO: Biological Process -", comp)))
} else {
message("⚠️ No GO:BP terms enriched for ", comp)
}
if (!is.null(go_mf) && nrow(go_mf) > 0) {
print(barplot(go_mf, showCategory = 10,
title = paste("GO: Molecular Function -", comp)))
} else {
message("⚠️ No GO:MF terms enriched for ", comp)
}
if (!is.null(go_cc) && nrow(go_cc) > 0) {
print(barplot(go_cc, showCategory = 10,
title = paste("GO: Cellular Component -", comp)))
} else {
message("⚠️ No GO:CC terms enriched for ", comp)
}
} else {
message("⚠️ Not enough mappable genes for ", comp)
}
}







# Create multi-panel GO enrichment plots (barplots) for each comparison
for (comp in names(go_results)) {
plots <- list()
for (ont in c("BP", "MF", "CC")) {
res <- go_results[[comp]][[ont]]
if (!is.null(res) && nrow(res) > 0) {
p <- barplot(res, showCategory = 10) +
ggtitle(paste(ont, "-", comp)) +
theme_minimal(base_size = 12)
plots[[ont]] <- p
}
}
# Combine all available plots (BP, MF, CC) for the current comparison
if (length(plots) > 0) {
combined_plot <- wrap_plots(plots, ncol = 1)
print(combined_plot)
} else {
cat("### ⚠️ No GO terms enriched for", comp, "\n\n")
}
}



